# ============================================================
# Follow the Leader (Brazil Panel) — 
# Produces:
#   Figure 1 = REWB mixed logit AMEs (within vs between)
#   Figure 2 = Switching effects (within only) from same model
#   Figure 3 = Stable trust: vote distribution (always vs never)
#   Figure 4 = Expectations REWB interactions (within only)
#   Figure 5 Mechanism = Media (social vs legacy) × REWB switching
# ============================================================


rm(list = ls())

# ----------------------------
# 0) Libraries (ONCE)
# ----------------------------
library(tidyverse)
library(glmmTMB)
library(marginaleffects)
library(estimatr)
library(broom)
library(scales)
library(tidyverse)

# ----------------------------
# 1) Load data
# ----------------------------
setwd("~/Dropbox/publications/follow_the_leader")

data<-read_excel("/Users/Mello/Dropbox/publications/follow_the_leader/painel_eleitoral_2022_ibpad.xlsx")


# ----------------------------
# 2) Helpers (avoid repetition)
# ----------------------------

# weighted proportion for binary indicator x (0/1)
weighted_prop <- function(x, w) {
  sum(w[x == 1], na.rm = TRUE) / sum(w, na.rm = TRUE)
}

# REWB decomposition: for a 0/1 variable x within id
make_rewb <- function(df, id_var, x_var, prefix) {
  id_var <- rlang::ensym(id_var)
  x_var  <- rlang::ensym(x_var)
  
  b_nm <- paste0(prefix, "_b")
  w_nm <- paste0(prefix, "_w")
  
  df %>%
    group_by(!!id_var) %>%
    mutate(
      !!b_nm := mean(!!x_var, na.rm = TRUE),
      !!w_nm := (!!x_var) - .data[[b_nm]]
    ) %>%
    ungroup()
}

tidy_ci <- function(mod, model_name) {
  broom::tidy(mod, conf.int = TRUE) %>%
    mutate(model = model_name)
}

save_plot <- function(p, filename, w = 7, h = 5) {
  ggsave(filename, plot = p, width = w, height = h, dpi = 300)
}

# ----------------------------
# 3) Core variable construction (ONCE)
# ----------------------------
data <- data %>%
  mutate(
    antiptID = if_else(polpartynegat_PT == "Sim", 1L, 0L, missing = 0L),
    ptID     = if_else(polpartyposit == "PT", 1L, 0L, missing = 0L),
    
    partisanship = case_when(
      ptID == 1L      ~ "Petistas",
      antiptID == 1L  ~ "Antipetistas",
      TRUE            ~ "Non-partisans"
    ),
    
    trust  = if_else(urna == "Confio muito", 1L, 0L, missing = 0L),
    
    vote = case_when(
      intencaovoto == "Jair Bolsonaro"            ~ "Bolsonaro",
      intencaovoto == "Luiz Inácio Lula da Silva" ~ "Lula",
      TRUE                                        ~ "Other Candidates"
    ),
    
    expectativa_bolsonaro = if_else(expectativa == "Jair Bolsonaro", 1L, 0L, missing = 0L),
    expectativa_lula      = if_else(expectativa == "Luiz Inácio Lula da Silva", 1L, 0L, missing = 0L),
    
    # weights + FE
    w_all = .data[["weights#all"]],
    onda  = factor(onda)
  ) %>%
  mutate(
    vote         = factor(vote, levels = c("Other Candidates", "Bolsonaro", "Lula")),
    partisanship = factor(partisanship, levels = c("Non-partisans", "Antipetistas", "Petistas"))
  )

#Solve issues with weights saved as character and with scientific notation
data$w_all <- as.numeric(gsub(",", ".", data$`weights#all`))



# quick party composition (if you still want it)
party <- data %>% 
  summarise(
    pct_petistas     = 100 * weighted_prop(ptID, w_all),
    pct_antipetistas = 100 * weighted_prop(antiptID, w_all),
    pct_nonpartisan  = 100 * weighted_prop(as.integer(ptID == 0 & antiptID == 0), w_all),
    pct_PL           = 100 * weighted_prop(as.integer(polpartyposit == "PL"), w_all)
  )



data_hybrid <- data %>%
  mutate(
    vote_bolsonaro = as.integer(vote == "Bolsonaro"),
    vote_lula      = as.integer(vote == "Lula"),
    time_num       = as.numeric(onda)
  ) %>%
  make_rewb(id, vote_bolsonaro, "vote_bolsonaro") %>% 
  make_rewb(id, vote_lula,      "vote_lula")%>%
  mutate(high_interest=ifelse(intpoliticalpond==3 | intpoliticalpond==4,1,0))

# ----------------------------
#mixed REWB logit 
# ----------------------------
m_rewb_rs <- glmmTMB(
  trust ~ time_num +
    vote_bolsonaro_w + vote_lula_w +
    vote_bolsonaro_b + vote_lula_b +
    antiptID + ptID +
    sexo_cotas + idade_cotas + classe_cotas + religiao_cotas +  
    situacaoecon + edu +
    (1 + time_num | id),
  data   = data_hybrid,
  family = binomial(link = "logit")
)

summary(m_rewb_rs)

# ------------------------------------------------------------
# FIGURE 1 — Within vs Between 
# ------------------------------------------------------------
ame_fig1 <- avg_slopes(
  m_rewb_rs,
  variables = c("vote_bolsonaro_w", "vote_bolsonaro_b", "vote_lula_w", "vote_lula_b")
) %>%
  as.data.frame() %>%
  mutate(
    candidate = case_when(
      str_detect(term, "bolsonaro") ~ "Bolsonaro",
      str_detect(term, "lula")      ~ "Lula"
    ),
    component = case_when(
      str_detect(term, "_w$") ~ "Within-person (change)",
      str_detect(term, "_b$") ~ "Between-person (baseline)"
    )
  )

plot_fig1 <- ggplot(ame_fig1, aes(x = component, y = estimate, color = candidate)) +
  geom_point(position = position_dodge(width = 0.4), size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                position = position_dodge(width = 0.4), width = 0) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "darkred") +
  coord_flip() +
  scale_color_manual(values = c("Bolsonaro" = "blue", "Lula" = "red")) +
  theme_bw() +
  labs(x = NULL, y = "Effect on Pr(Trust = 1) (AME)", color = NULL) +
  theme(legend.position = "bottom")

save_plot(plot_fig1, "Figure1_new.pdf", w = 7, h = 5)


# ------------------------------------------------------------
# FIGURE 2 — Switching only (within-person AMEs)
# ------------------------------------------------------------

# Manually create the interaction term
data_hybrid$interaction_bolsonaro_high <- data_hybrid$vote_bolsonaro_w * data_hybrid$high_interest

# Re-run the model with the manually created interaction term
m_rewb_rs_low_updated <- glmmTMB(
  trust ~ time_num +
    interaction_bolsonaro_high+ vote_lula_w + vote_bolsonaro_w + 
    vote_bolsonaro_b + vote_lula_b + 
    antiptID + ptID + sexo_cotas + idade_cotas + 
    classe_cotas + religiao_cotas + situacaoecon + edu + 
    (1 + time_num | id),
  data   = data_hybrid,
  family = binomial(link = "logit")
)



ame_fig2_updated <- avg_slopes(
  m_rewb_rs_low_updated,
  variables = c("vote_bolsonaro_w", "interaction_bolsonaro_high"),
  type = "response"
) %>%
  as.data.frame() %>%
  mutate(
    label = case_when(
      term == "vote_bolsonaro_w" ~ "Switch to Bolsonaro X Lower Interest",
      term == "interaction_bolsonaro_high" ~ "Switch to Bolsonaro x Higher Interest"
    )
  )

# Print the results
print(ame_fig2_updated)


# Plotting the effect of Switch to Bolsonaro (High Interest) vs Bolsonaro x Low Interest
p_info_updated_plot <- ggplot(ame_fig2_updated, aes(x = reorder(label, estimate), y = estimate)) +
  geom_point(size = 3) +  
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +  
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +  
  coord_flip() +  
  theme_minimal(base_size = 13) +  # Minimal theme for clarity
  labs(x = NULL, y = "Interaction: (Switch) × Interest in Politics\nEffect on trust", color = NULL) +  # Labels for axes
  theme(legend.position = "none")  # Remove legend

 

# Display the plot
print(p_info_updated_plot)

# Save the plot
save_plot(p_info_updated_plot, "Figure2_new.pdf", w = 7, h = 4)




# ------------------------------------------------------------
# FIGURE 3 — Stable trust: vote distribution
# ------------------------------------------------------------
trust_profile <- data %>%
  group_by(id) %>%
  summarise(
    always_trust = all(urna == "Confio muito", na.rm = TRUE),
    never_trust  = all(urna == "Não confio",   na.rm = TRUE),
    .groups = "drop"
  )

vote_dist <- data %>%
  left_join(trust_profile, by = "id") %>%
  filter(always_trust | never_trust) %>%
  mutate(trust_type = if_else(always_trust, "Always trust urna", "Never trust urna")) %>%
  group_by(id, trust_type) %>%
  summarise(vote = first(vote), .groups = "drop") %>%
  count(trust_type, vote) %>%
  group_by(trust_type) %>%
  mutate(pct = n / sum(n))

plot_fig3 <- ggplot(vote_dist, aes(x = vote, y = pct, fill = trust_type)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_manual(values = c("Always trust urna" = "blue", "Never trust urna" = "red")) +
  theme_minimal(base_size = 13) +
  labs(x = "Vote intention", y = "Share of respondents", fill = NULL)

save_plot(plot_fig3, "Figure3_new.pdf", w = 7, h = 4)

# ------------------------------------------------------------
# FIGURE 4 — Expectations (REWB-style within/between expectations)
#         Plot WITHIN only
# ------------------------------------------------------------
data_expec <- data %>%
  group_by(id) %>%
  mutate(
    expec_b_b = mean(expectativa_bolsonaro, na.rm = TRUE),
    expec_b_w = expectativa_bolsonaro - expec_b_b,
    expec_l_b = mean(expectativa_lula, na.rm = TRUE),
    expec_l_w = expectativa_lula - expec_l_b
  ) %>%
  ungroup()

controls <- trust ~ sexo_cotas + idade_cotas + classe_cotas + religiao_cotas + situacaoecon + edu

m_expec_vote_b <- lm_robust(
  update(controls, . ~ vote * expec_b_w + vote * expec_b_b + .),
  data = data_expec, fixed_effects = ~ onda, clusters = id, se_type = "CR0", weights = w_all
)

m_expec_vote_l <- lm_robust(
  update(controls, . ~ vote * expec_l_w + vote * expec_l_b + .),
  data = data_expec, fixed_effects = ~ onda, clusters = id, se_type = "CR0", weights = w_all
)

m_expec_part_b <- lm_robust(
  update(controls, . ~ partisanship * expec_b_w + .),
  data = data_expec, clusters = id, se_type = "CR0", weights = w_all
)
m_expec_part_l <- lm_robust(
  update(controls, . ~ partisanship * expec_l_w + partisanship * expec_l_b + .),
  data = data_expec, fixed_effects = ~ onda, clusters = id, se_type = "CR0", weights = w_all
)

df_plot <- bind_rows(
  tidy_ci(m_expec_vote_b, "vote_expect_B"),
  tidy_ci(m_expec_vote_l, "vote_expect_L"),
  tidy_ci(m_expec_part_b, "part_expect_B"),
  tidy_ci(m_expec_part_l, "part_expect_L")
) %>%
  filter(str_detect(term, ":(expec_[bl]_[bw])$") | str_detect(term, "expec_[bl]_[bw]")) %>%
  filter(term %in% c(
    "voteBolsonaro:expec_b_w","voteBolsonaro:expec_b_b","voteBolsonaro:expec_l_w","voteBolsonaro:expec_l_b",
    "partisanshipAntipetistas:expec_b_w","partisanshipAntipetistas:expec_b_b",
    "partisanshipAntipetistas:expec_l_w","partisanshipAntipetistas:expec_l_b"
  )) %>%
  mutate(
    block = if_else(str_detect(model, "^vote_"), "A) Vote-based (Bolsonaro voters)", "B) Partisanship-based (Antipetistas)"),
    expectation = if_else(str_detect(term, "expec_b_"), "Expect Bolsonaro wins", "Expect Lula wins"),
    component = if_else(str_detect(term, "_w$"), "Within-person (change)", "Between-person (baseline)")
  ) %>%
  filter(component == "Within-person (change)") %>%
  mutate(
    label = case_when(
      expectation == "Expect Bolsonaro wins" ~ "Expecting\nBolsonaro victory",
      expectation == "Expect Lula wins"      ~ "Expecting\nLula victory"
    ),
    block = factor(block, levels = c("A) Vote-based (Bolsonaro voters)", "B) Partisanship-based (Antipetistas)"))
  )

plot_fig4 <- ggplot(df_plot, aes(x = label, y = estimate)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  coord_flip() +
  facet_wrap(~ block) +
  theme_minimal(base_size = 13) +
  labs(x = NULL, y = "Within-person change in trust") +
  theme(strip.text = element_text(face = "bold"),
        panel.grid.minor = element_blank())

save_plot(plot_fig4, "Figure4_within_expectations.pdf", w = 8, h = 4.8)



df_plot_bol <- df_plot %>%
  filter(block == "A) Vote-based (Bolsonaro voters)")

plot_fig4_bol <- ggplot(df_plot_bol, aes(x = label, y = estimate)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  coord_flip() +
  theme_minimal(base_size = 13) +
  labs(x = NULL, y = "Within-person change in trust") +
  theme(panel.grid.minor = element_blank())

print(plot_fig4_bol)

ggsave(
  "Figure4_Bolsonaro_within_expectations.pdf",
  plot   = plot_fig4_bol,
  width  = 6,
  height = 4,
  dpi    = 300
)


# ------------------------------------------------------------
# MECHANISM — Media (social vs legacy) × switching (REWB vote)
# ------------------------------------------------------------
data_mech <- data_hybrid %>%  
  mutate(
    info_legacy = informa_1 + informa_2 + informa_3 + informa_4 + informa_6,
    info_social = informa_5 + informa_7,
    info_legacy_z = as.numeric(scale(info_legacy)),
    info_social_z = as.numeric(scale(info_social))
  ) %>%
  filter(!is.na(trust), !is.na(info_social_z), !is.na(info_legacy_z), !is.na(w_all))

m_info_rewb2 <- lm_robust(
  trust ~
    vote_bolsonaro_w * info_social +
    vote_bolsonaro_w * info_legacy +
    vote_lula_w      * info_social +
    vote_lula_w      * info_legacy +
    vote_bolsonaro_b + vote_lula_b +
    info_social + info_legacy +
    sexo_cotas + idade_cotas + classe_cotas + religiao_cotas +
    situacaoecon + edu,
  data          = data_mech,
  fixed_effects = ~ onda,
  clusters      = id,
  se_type       = "CR0",
  weights       = w_all
)

df_plot_info <- broom::tidy(m_info_rewb2, conf.int = TRUE) %>%
  filter(str_detect(term,"info_legacy") | str_detect(term,"info_social")) %>%
  mutate(
    switch = case_when(
      str_detect(term, "vote_bolsonaro_w") ~ "Switch into Bolsonaro",
      str_detect(term, "vote_lula_w")      ~ "Switch into Lula",
      str_detect(term, "info_social")      ~ "Baseline",
      str_detect(term, "info_legacy")      ~ "Baseline"
    ),
    media = case_when(
      str_detect(term, "info_social") ~ "Social media",
      str_detect(term, "info_legacy") ~ "Legacy media"
    ),
    switch = factor(switch, levels = c("Baseline","Switch into Bolsonaro", "Switch into Lula")),
    media  = factor(media,  levels = c("Social media", "Legacy media"))
  )

p_info <- ggplot(df_plot_info, aes(x = switch, y = estimate)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  coord_flip() +
  facet_wrap(~ media, ncol = 2) +
  theme_minimal(base_size = 13) +
  labs(x = NULL, y = "Interaction: (Switch) × Media use\nEffect on trust") +
  theme(strip.text = element_text(face = "bold"),
        panel.grid.minor = element_blank())

save_plot(p_info, "Figure_mechanism_social_vs_legacy_REWB.pdf", w = 8, h = 3.6)







##### Appendix 
library(dplyr)
library(estimatr)
library(modelsummary)
library(knitr)
library(kableExtra)

dir.create("appendix_tables", showWarnings = FALSE)


waves <- levels(data$onda)

retention_table <- data %>%
  group_by(onda) %>%
  summarise(n_respondents = n_distinct(id), .groups = "drop") %>%
  arrange(onda) %>%
  mutate(
    baseline_n   = first(n_respondents),
    retention_rt = n_respondents / baseline_n
  ) %>%
  select(
    Wave = onda,
    `Respondents (unique id)` = n_respondents,
    `Retention rate (vs. Wave 1)` = retention_rt
  )

tab_a1 <- retention_table %>%
  kable(
    format  = "latex",
    booktabs = TRUE,
    digits  = 3,
    caption = "Table A1. Panel retention across waves"
  ) %>%
  kable_styling(latex_options = c("hold_position"))

writeLines(as.character(tab_a1), "appendix_tables/Table_A1_Retention.tex")


# ----------------------------
# A2) Attrition indicator + baseline covariates (Round 1)
#     + Balance regression  -> Table_A2_AttritionPredictors.tex
# ----------------------------
n_total_waves <- length(waves)
baseline_wave <- waves[1]

attrition <- data %>%
  group_by(id) %>%
  summarise(
    n_waves   = n_distinct(onda),
    attrited  = as.integer(n_waves < n_total_waves),
    
    trust0    = trust[onda == baseline_wave][1],
    vote0     = vote[onda == baseline_wave][1],
    antiptID0 = antiptID[onda == baseline_wave][1],
    ptID0     = ptID[onda == baseline_wave][1],
    idade0    = idade_cotas[onda == baseline_wave][1],
    edu0      = edu[onda == baseline_wave][1],
    
    .groups = "drop"
  ) %>%
  mutate(
    vote0  = factor(vote0, levels = c("Other Candidates", "Bolsonaro", "Lula")),
    idade0 = factor(idade0)
  )

attrition_model <- lm_robust(
  attrited ~ trust0 + vote0 + antiptID0 + ptID0 + idade0 + edu0,
  data    = attrition,
  se_type = "HC2"
)

coef_map <- c(
  "trust0"         = "Baseline trust (Trust=1)",
  "vote0Bolsonaro" = "Baseline vote: Bolsonaro",
  "vote0Lula"      = "Baseline vote: Lula",
  "antiptID0"      = "Baseline Anti-PT ID",
  "ptID0"          = "Baseline PT ID",
  "edu0"           = "Education (baseline)"
)

coef_map_age <- setNames(
  paste0("Age: ", levels(attrition$idade0)[-1]),
  paste0("idade0", levels(attrition$idade0)[-1])
)

coef_map_all <- c(coef_map, coef_map_age)

tab_a2 <- modelsummary(
  list("Attrition (LPM)" = attrition_model),
  coef_map  = coef_map_all,
  statistic = "({std.error})",
  stars     = TRUE,
  gof_map   = c("nobs", "r.squared", "adj.r.squared"),
  output    = "latex",
  title     = "Table A2. Predictors of attrition (baseline covariates)",
  notes     = c(
    "Outcome equals 1 if respondent is not observed in all waves.",
    "Robust (HC2) standard errors in parentheses."
  )
)

writeLines(as.character(tab_a2), "appendix_tables/Table_A2_AttritionPredictors.tex")


# ----------------------------
# A3) Baseline means by attrition status -> Table_A3_AttritionMeans.tex
# ----------------------------
balance_means <- attrition %>%
  group_by(attrited) %>%
  summarise(
    N          = n(),
    trust0     = mean(trust0, na.rm = TRUE),
    bolsonaro0 = mean(vote0 == "Bolsonaro", na.rm = TRUE),
    lula0      = mean(vote0 == "Lula", na.rm = TRUE),
    antiptID0  = mean(antiptID0, na.rm = TRUE),
    ptID0      = mean(ptID0, na.rm = TRUE),
    edu0       = mean(edu0, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    `Attrition status` = if_else(attrited == 1, "Attrited before final wave", "Observed in all waves")
  ) %>%
  select(
    `Attrition status`,
    N, trust0, bolsonaro0, lula0, antiptID0, ptID0, edu0
  )

tab_a3 <- balance_means %>%
  kable(
    format  = "latex",
    booktabs = TRUE,
    digits  = 3,
    caption = "Table A3. Baseline means by attrition status"
  ) %>%
  kable_styling(latex_options = c("hold_position"))

writeLines(as.character(tab_a3), "appendix_tables/Table_A3_AttritionMeans.tex")




library(broom)
library(dplyr)
library(knitr)
library(kableExtra)

dir.create("appendix_tables", showWarnings = FALSE)

tab_mixed <- broom::tidy(m_rewb_rs, conf.int = TRUE, effects = "fixed") %>%
  mutate(
    OR     = exp(estimate),
    OR_low = exp(conf.low),
    OR_hi  = exp(conf.high)
  ) %>%
  select(term, estimate, std.error, statistic, p.value, conf.low, conf.high)

tab_mixed_tex <- tab_mixed %>%
  kable(
    format   = "latex",
    booktabs = TRUE,
    digits   = 3,
    caption  = "Table A0. Mixed logit REWB model (glmmTMB): log-odds and odds ratios"
  ) %>%
  kable_styling(latex_options = c("hold_position"))

writeLines(as.character(tab_mixed_tex),
           "appendix_tables/M1_REWB_logit_RI_RS.tex")


library(modelsummary)

dir.create("appendix_tables", showWarnings = FALSE)

for(nm in names(models_lm)){
  tex_obj <- modelsummary(
    list(models_lm[[nm]]),
    coef_map  = coef_map,
    statistic = "({std.error})",
    stars     = TRUE,
    gof_map   = c("nobs", "r.squared", "adj.r.squared"),
    output    = "latex",
    title     = paste0("Appendix: ", nm),
    notes     = notes_common
  )
  
  fn <- gsub("[^A-Za-z0-9_\\-]+", "_", nm)
  writeLines(as.character(tex_obj),
             file.path("appendix_tables", paste0(fn, ".tex")))
}

